import pandas as pd


def combination_sheet():
    """
    第一次结合，将两种文件的数据提取出来按照日期merge，然后预测
    """
    f = pd.ExcelFile('data2/CSV20201227-28.xlsx')
    f.sheet_names  # 获取工作表名称
    
    df = pd.DataFrame()
    for i in f.sheet_names:
        d = pd.read_excel('data2/CSV20201227-28.xlsx', sheet_name=i)
        df = pd.concat([df, d], axis=0)

    df1 = pd.read_excel('data2/20201227.xlsx')
    df2 = pd.read_excel('data2/20201228.xlsx')
    df1.时间HHMM = "12-27 " + df1.时间HHMM.astype('str')
    df2.时间HHMM = "12-28 " + df2.时间HHMM.astype('str')
    df1 = df1.loc[:, ['时间HHMM', '能见度m']]
    df2 = df2.loc[:, ['时间HHMM', '能见度m']]
    dff = pd.concat([df1, df2], axis=0)
    # dff = df2
    df.采样点时间 = df.采样点时间.astype('str').str[5:16].str.replace(':', '')
    
    df = df.merge(dff, left_on="采样点时间", right_on="时间HHMM", how="outer")
    df = df.dropna()
    df.iloc[:,1:].to_excel('data2/combination_data.xlsx', index=False)

def read_all_sheet(name):
    f = pd.ExcelFile(name)
    f.sheet_names  # 获取工作表名称
    
    df = pd.DataFrame()
    for i in f.sheet_names:
        d = pd.read_excel(name, sheet_name=i)
        df = pd.concat([df, d], axis=0)
    return df

def combination_sheet2():
    """
    第二次结合，做了空缺值补全
    """
    df1 = read_all_sheet('data3/CSV20201226-27.xlsx')
    df2 = read_all_sheet('data3/CSV20201227-28.xlsx')
    df = pd.concat([df1, df2], axis=0)
    df0 = pd.read_excel('data3/20201226.xlsx')
    df1 = pd.read_excel('data3/20201227.xlsx')
    df2 = pd.read_excel('data3/20201228.xlsx')
    df0.时间HHMM = "12-26 " + df0.时间HHMM.astype('str')
    df1.时间HHMM = "12-27 " + df1.时间HHMM.astype('str')
    df2.时间HHMM = "12-28 " + df2.时间HHMM.astype('str')
    pd.concat([df0, df1, df2], axis=0).to_excel('data3/complement_data.xlsx', index=False)
    df0 = df0.loc[:, ['时间HHMM', '能见度m']]
    df1 = df1.loc[:, ['时间HHMM', '能见度m']]
    df2 = df2.loc[:, ['时间HHMM', '能见度m']]
    dff = pd.concat([df0, df1, df2], axis=0)
    # dff = df2
    df.采样点时间 = df.采样点时间.astype('str').str[5:16].str.replace(':', '')
    
    df = df.merge(dff, left_on="采样点时间", right_on="时间HHMM", how="outer")
    df = df.dropna()
    df.iloc[:,1:].to_excel('data3/combination_data.xlsx', index=False)
    

if __name__ == "__main__":
    combination_sheet2()